home *** CD-ROM | disk | FTP | other *** search
- /* String search routines for XEmacs.
- Copyright (C) 1985, 1986, 1987, 1992, 1993, 1994
- Free Software Foundation, Inc.
- Copyright (C) 1994, 1995 Amdahl Corporation.
-
- This file is part of XEmacs.
-
- XEmacs is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
-
- XEmacs is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- You should have received a copy of the GNU General Public License
- along with XEmacs; see the file COPYING. If not, write to the Free
- Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
-
- /* Synched up with: FSF 19.28. */
-
- /* Hacked on for Mule by Ben Wing, December 1994. */
-
- #include <config.h>
- #include "lisp.h"
-
- #include "buffer.h"
- #include "commands.h"
- #include "insdel.h"
- #include "mule.h"
- #include <sys/types.h>
- #include "regex.h"
- #include "syntax.h"
-
- #ifndef MULE_REGEXP
- #define EXTENDED_REGEXP_P(x) 0
- #endif
-
-
- /* We compile regexps into this buffer and then use it for searching. */
-
- struct re_pattern_buffer searchbuf;
-
- static char search_fastmap[0400];
-
- /* Last regexp we compiled */
- static Lisp_Object last_regexp;
-
- #ifdef MULE_REGEXP
- /* Regular expressions used in forward/backward-word */
- Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
-
- /* Version number of system internal regexp compiler and interpreter. */
-
- Lisp_Object Vregexp_version;
- #endif /* MULE_REGEXP */
-
- /* Every call to re_match, etc., must pass &search_regs as the regs argument
- unless you can show it is unnecessary (i.e., if re_match is certainly going
- to be called again before region-around-match can be called).
-
- Since the registers are now dynamically allocated, we need to make
- sure not to refer to the Nth register before checking that it has
- been allocated by checking search_regs.num_regs.
-
- The regex code keeps track of whether it has allocated the search
- buffer using bits in searchbuf. This means that whenever you
- compile a new pattern, it completely forgets whether it has
- allocated any registers, and will allocate new registers the next
- time you call a searching or matching function. Therefore, we need
- to call re_set_registers after compiling a new pattern or after
- setting the match registers, so that the regex functions will be
- able to free or re-allocate it properly. */
- static struct re_registers search_regs;
- #ifdef NEW_SYNTAX
- # define SEARCH_NREGS(x) (x)->num_regs
- #else
- # define SEARCH_NREGS(x) RE_NREGS
- #endif
-
-
- /* The buffer in which the last search was performed, or
- Qt if the last search was done in a string;
- Qnil if no searching has been done yet. */
- static Lisp_Object last_thing_searched;
-
- /* error condition signalled when regexp compile_pattern fails */
- Lisp_Object Qinvalid_regexp;
-
- static void set_search_regs (Bytind, Bytecount);
-
- static void
- matcher_overflow (void)
- {
- #if 0 /* This is too much of a compatibility problem. */ /* #### GAG! */
- error ("Stack overflow in regexp matcher");
- #endif
- }
-
- /* Compile a regexp and signal a Lisp error if anything goes wrong. */
- int
- compile_pattern (Lisp_Object pattern, struct re_pattern_buffer *bufp,
- struct re_registers *regp, char *translate,
- int backward, int no_error)
- {
- /* !!#### This function has not been Mule-ized */
- CONST char *val;
-
- if (EQ (pattern, last_regexp)
- && translate == bufp->translate
- #ifdef MULE_REGEXP
- && NILP (current_buffer->mc_flag) == !bufp->mc_flag
- && (!bufp->syntax_version
- || bufp->syntax_version == syntax_table_version)
- && (!bufp->category_version
- || bufp->category_version == category_table_version)
- #endif /* MULE_REGEXP */
- )
- return 1;
-
- #ifdef MULE_REGEXP
- if (CONSP (pattern)) /* pre-compiled regexp */
- {
- Lisp_Object compiled;
-
- val = 0;
- pattern = XCAR (pattern);
- if (CONSP (pattern)
- && (compiled = backward ? XCDR (pattern) : XCAR (pattern))
- && XTYPE (compiled) == Lisp_Vector
- && XVECTOR (compiled)->size == 4)
- {
- /* set_pattern will set bufp->allocated to NILP */
- set_pattern (compiled, bufp, translate);
- return 1;
- }
-
- val = "Invalid pre-compiled regexp";
- goto invalid_regexp;
- }
- #endif /* MULE_REGEXP */
-
- if (no_error)
- {
- if (!STRINGP (pattern))
- return 0;
- }
- else
- CHECK_STRING (pattern, 0);
-
- last_regexp = Qnil;
- bufp->translate = translate;
- #ifdef MULE_REGEXP
- bufp->syntax_version = bufp->category_version = 0;
- /*
- 'bufp->allocated == 0' means bufp->buffer points to pre-compiled pattern
- in a lisp string, which should not be 'realloc'ed. */
- if (bufp->allocated == 0) bufp->buffer = 0;
- #endif /* MULE_REGEXP */
-
- val = re_compile_pattern ((char *) string_data (XSTRING (pattern)),
- string_length (XSTRING (pattern)),
- bufp);
- if (val)
- {
- #ifdef MULE_REGEXP
- invalid_regexp:
- #endif /* MULE_REGEXP */
- if (no_error)
- return 0;
- else
- signal_error (Qinvalid_regexp, list1 (build_string (val)));
- }
- last_regexp = pattern;
- #ifdef EMACS19_REGEXP
- /* Advise the searching functions about the space we have allocated
- for register data. */
- if (regp)
- re_set_registers (bufp, regp, regp->num_regs, regp->start, regp->end);
- #endif
- return 1;
- }
-
- #ifdef MULE_REGEXP
- /* Set a pre-compiled pattern into a pattern buffer */
- /* pattern is a list of strings:
- compiled_code, fastmap, syntax_fastmap, category_fastmap */
- set_pattern (Lisp_Object pattern, struct re_pattern_buffer *bufp,
- char *translate)
- {
- /* !!#### This function has not been Mule-ized */
- Lisp_Object temp;
-
- if (bufp->allocated != 0)
- {
- /*
- Coming here means that this buffer was used to hold
- an old-style pattern. Because new-style pattern is not
- self-destructive, we only have to set pointer.
- Instead, to avoid it being freed later,
- bufp->allocated should be set to 0.
- */
- xfree (bufp->buffer);
- bufp->allocated = 0;
- }
- temp = vector_data (XVECTOR (pattern))[0];
- bufp->buffer = (char *) string_data (XSTRING (temp));
- bufp->used = string_length (XSTRING (temp));
- bufp->translate = translate;
- /* set fastmap */
- bufp->mc_flag = !NILP (current_buffer->mc_flag);
-
- #ifdef EMACS19_REGEXP
- bufp->short_flag = 0;
- bufp->no_empty = 0;
-
- bufp->regs_allocated = REGS_UNALLOCATED;
- bufp->re_nsub = 0;
- bufp->no_sub = 0;
- bufp->newline_anchor = 1;
-
- bufp->syntax = 0;
- bufp->not_bol = bufp->not_eol = 0;
- #endif /* EMACS19_REGEXP */
-
- {
- Lisp_Object fmap, syntax_fmap, category_fmap;
- char *fastmap = bufp->fastmap;
- int i;
- unsigned char ch;
-
- bufp->fastmap_accurate = 1;
-
- fmap = vector_data (XVECTOR (pattern))[1];
- if (NILP (fmap) && NILP (syntax_fmap) && NILP (category_fmap))
- {
- bufp->can_be_null = 1;
- }
- else
- {
- bufp->can_be_null = 0;
- memset (fastmap, 0, 256);
- if (STRINGP (fmap))
- memcpy (fastmap, string_data (XSTRING (fmap)),
- string_length (XSTRING (fmap)));
-
- syntax_fmap = vector_data (XVECTOR (pattern))[2];
- if (STRINGP (syntax_fmap))
- {
- for (ch = 0; ch < 0x80; ch++)
- if (!fastmap[ch]
- && string_char (XSTRING (syntax_fmap),
- syntax_code_spec[(char) SYNTAX (ch)]))
- fastmap[ch] = 1;
- bufp->syntax_version = syntax_table_version;
- }
- else
- bufp->syntax_version = 0;
-
- category_fmap = vector_data (XVECTOR (pattern))[3];
- if (STRINGP (category_fmap))
- {
- char str[96], *p;
- int not_category_spec = 0;
-
- for (i = 32; i < 128; i++)
- if (string_char (XSTRING (category_fmap), i) == 2)
- {
- not_category_spec = 1;
- break;
- }
- for (ch = 0; ch < 0x80; ch++)
- {
- if (!fastmap[ch])
- {
- pack_mnemonic_string
- (char_category (ch, current_buffer->category_table),
- str);
- if (not_category_spec)
- {
- for (p = str; *p; p++)
- if (string_char (XSTRING (category_fmap), *p) != 2)
- {
- fastmap[ch] = 1;
- break;
- }
- }
- else
- {
- for (p = str; *p; p++)
- if (string_char (XSTRING (category_fmap), *p) == 1)
- {
- fastmap[ch] = 1;
- break;
- }
- }
- }
- }
- bufp->category_version = category_table_version;
- }
- else
- bufp->category_version = 0;
-
- if (bufp->mc_flag
- && (STRINGP (syntax_fmap) || STRINGP (category_fmap))
- {
- for (ch = 0x80; ch < 0xA0; ch++)
- fastmap[ch] = 1;
- }
- }
- }
- Force 're-compile-pattern' when compile_pattern is called next time. */
- last_regexp = Qnil;
- }
- #endif /* MULE_REGEXP */
-
- /* Error condition used for failing searches */
- Lisp_Object Qsearch_failed;
-
-
- DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0,
- "Return t if text after point matches regular expression PAT.\n\
- This function modifies the match data that `match-beginning',\n\
- `match-end' and `match-data' access; save and restore the match\n\
- data if you want to preserve them.\n\
- If support for Mule regexps was compiled into this Emacs, and optional\n\
- second arg BACK is non-nil and PAT is a pre-compiled pattern,\n\
- PAT is looked backward from point.")
- (string, back)
- Lisp_Object string, back;
- {
- /* !!#### This function has not been Mule-ized */
- Lisp_Object val;
- Bytind p1, p2;
- Bytecount s1, s2;
- int i;
- struct buffer *buf = current_buffer;
-
- compile_pattern (string, &searchbuf, &search_regs,
- (!NILP (buf->case_fold_search)
- ? string_ext_data (XSTRING (buf->downcase_table))
- : 0), !NILP (back), 0);
-
- /* Backward search requires extended regexp. */
- if (!NILP (back) && !EXTENDED_REGEXP_P (&searchbuf))
- error ("Can't look backward with this pattern");
-
- /* Get pointers and sizes of the two strings
- that make up the visible portion of the buffer. */
-
- p1 = BI_BUF_BEGV (buf);
- p2 = BI_BUF_CEILING_OF (buf, p1);
- s1 = p2 - p1;
- s2 = BI_BUF_ZV (buf) - p2;
-
- QUIT;
-
- /* mstop, the 8th arg of re_match_2 is offset from BEGV. So, when */
- /* backward search, it should be BEGV - BEGV, ie. 0 */
- i = re_match_2 (&searchbuf, (unsigned char *) BI_BUF_BYTE_ADDRESS (buf, p1),
- s1, BI_BUF_BYTE_ADDRESS (buf, p2), s2,
- BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
- !NILP (back) ? 0 : BI_BUF_ZV (buf) - BI_BUF_BEGV (buf)
- #ifdef MULE_REGEXP
- , !NILP (back)
- #endif
- );
- if (i == -2)
- matcher_overflow ();
-
- val = (0 <= i ? Qt : Qnil);
- for (i = 0; i < SEARCH_NREGS (&search_regs); i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i] += BI_BUF_BEGV (buf);
- search_regs.end[i] += BI_BUF_BEGV (buf);
- }
- XSETBUFFER (last_thing_searched, buf);
- return val;
- }
-
- DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
- "Return index of start of first match for REGEXP in STRING, or nil.\n\
- If third arg START is non-nil, start search at that index in STRING.\n\
- For index of first char beyond the match, do (match-end 0).\n\
- `match-end' and `match-beginning' also give indices of substrings\n\
- matched by parenthesis constructs in the pattern.")
- (regexp, string, start)
- Lisp_Object regexp, string, start;
- {
- /* !!#### This function has not been Mule-ized */
- int val;
- int s;
- struct buffer *buf = current_buffer;
-
- CHECK_STRING (string, 1);
-
- if (NILP (start))
- s = 0;
- else
- {
- Bytecount len = string_length (XSTRING (string));
-
- CHECK_INT (start, 2);
- s = XINT (start);
- if (s < 0 && -s <= len)
- s = len + s;
- else if (0 > s || s > len)
- args_out_of_range (string, start);
- }
-
- compile_pattern (regexp, &searchbuf, &search_regs,
- (!NILP (buf->case_fold_search)
- ? (char *) string_data (XSTRING (buf->downcase_table))
- : 0), 0, 0);
- QUIT;
- val = re_search (&searchbuf,
- (char *) string_data (XSTRING (string)),
- string_length (XSTRING (string)),
- s,
- string_length (XSTRING (string)) - s,
- &search_regs);
- last_thing_searched = Qt;
- if (val == -2)
- matcher_overflow ();
- if (val < 0) return Qnil;
- return make_number (val);
- }
-
-
- /* Match REGEXP against STRING, searching all of STRING,
- and return the index of the match, or negative on failure.
- This does not clobber the match data. */
-
- Bytecount
- fast_string_match (Lisp_Object regexp, CONST Bufbyte *nonreloc,
- Lisp_Object reloc, Bytecount offset,
- Bytecount length, int no_error, int no_quit)
- {
- int val;
- Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
-
- if (!compile_pattern (regexp, &searchbuf, 0, 0, 0, no_error))
- return -1; /* will only do this when no_error */
- if (!no_quit)
- QUIT;
- else
- no_quit_in_re_search = 1;
-
- fixup_internal_substring (nonreloc, reloc, offset, &length);
-
- if (!NILP (reloc))
- {
- if (no_quit)
- newnonreloc = string_data (XSTRING (reloc));
- else
- {
- /* QUIT could relocate RELOC. Therefore we must alloca()
- and copy. No way around this except some serious
- rewriting of re_search(). */
- newnonreloc = (Bufbyte *) alloca (length);
- memcpy (newnonreloc, string_data (XSTRING (reloc)), length);
- }
- }
-
- val = re_search (&searchbuf, (char *) newnonreloc + offset, length, 0,
- length, 0);
-
- no_quit_in_re_search = 0;
- return val;
- }
-
- /* Search in BUF for COUNT instances of the character TARGET, starting
- at START and stopping at LIMIT. If COUNT is negative, search backwards.
- If LIMIT is <= 0, stop at end of accessible region of buffer (or
- beginning of accessible region, if COUNT is negative).
-
- If we find COUNT instances, set *SHORTAGE to zero, and return the
- position after the COUNTth match. Note that for reverse motion
- this is not the same as the usual convention for Emacs motion commands.
-
- If we don't find COUNT instances before reaching LIMIT, set *SHORTAGE
- to the number of TARGETs left unfound, and return the end of the
- buffer we bumped up against.
-
- If ALLOW_QUIT is non-zero, call QUIT periodically. */
-
- Bufpos
- scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos limit,
- int count, int *shortage, int allow_quit)
- {
- Bytind lim = limit > 0 ? bufpos_to_bytind (buf, limit) :
- ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
- Bytind st = bufpos_to_bytind (buf, start);
-
- assert (count != 0);
-
- if (shortage)
- *shortage = 0;
-
- if (count > 0)
- {
- #ifdef MULE
- /* Due to the Mule representation of characters in a buffer,
- we can simply search for characters in the range 0 - 127
- directly. For other characters, we do it the "hard" way.
- Note that this way works for all characters but the other
- way is faster. */
- if (target >= 0200)
- {
- while (st < lim && count > 0)
- {
- if (BI_BUF_FETCH_CHAR (buf, st) == target)
- count--;
- INC_BYTIND (buf, st);
- }
- }
- else
- #endif
- {
- while (st < lim && count > 0)
- {
- Bytind ceil;
- Bufbyte *bufptr;
-
- ceil = BI_BUF_CEILING_OF (buf, st);
- ceil = min (lim, ceil);
- bufptr = memchr (BI_BUF_BYTE_ADDRESS (buf, st), (int) target,
- ceil - st);
- if (bufptr)
- {
- count--;
- st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
- }
- else
- st = ceil;
- }
- }
-
- if (shortage)
- *shortage = count;
- if (allow_quit)
- QUIT;
- return bytind_to_bufpos (buf, st);
- }
- else
- {
- #ifdef MULE
- if (target >= 0200)
- {
- while (st > lim && count < 0)
- {
- DEC_BYTIND (buf, st);
- if (BI_BUF_FETCH_CHAR (buf, st) == target)
- count++;
- }
- }
- else
- #endif
- {
- while (st > lim && count < 0)
- {
- Bytind floor;
- Bufbyte *bufptr;
- Bufbyte *floorptr;
-
- floor = BI_BUF_FLOOR_OF (buf, st);
- floor = max (lim, floor);
- /* No memrchr() ... */
- bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
- floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
- while (bufptr >= floorptr)
- {
- st--;
- /* At this point, both ST and BUFPTR refer to the same
- character. When the loop terminates, ST will
- always point to the last character we tried. */
- if (* (unsigned char *) bufptr == (unsigned char) target)
- {
- count++;
- break;
- }
- bufptr--;
- }
- }
- }
-
- if (shortage)
- *shortage = -count;
- if (allow_quit)
- QUIT;
- if (count)
- return bytind_to_bufpos (buf, st);
- else
- {
- /* We found the character we were looking for; we have to return
- the position *after* it due to the strange way that the return
- value is defined. */
- INC_BYTIND (buf, st);
- return bytind_to_bufpos (buf, st);
- }
- }
- }
-
- Bufpos
- find_next_newline_no_quit (struct buffer *buf, Bufpos from, int cnt)
- {
- return scan_buffer (buf, '\n', from, 0, cnt, (int *) 0, 0);
- }
-
- Bufpos
- find_next_newline (struct buffer *buf, Bufpos from, int cnt)
- {
- return scan_buffer (buf, '\n', from, 0, cnt, (int *) 0, 1);
- }
-
- static Lisp_Object
- skip_chars (struct buffer *buf, int forwardp, int syntaxp,
- Lisp_Object string, Lisp_Object lim)
- {
- /* !!#### This function has not been Mule-ized */
- unsigned char *p, *pend;
- /* jwz: c must be bigger than char, else (skip-chars-forward "\200-\377")
- loops while trying to fill fastmap, as c++ wraps when c == 255. */
- unsigned int c;
- unsigned char fastmap[0400];
- int negate = 0;
- #ifdef MULE_REGEXP
- unsigned char *b;
- struct compile_charset_information info, *ip = &info;
- #endif /* MULE_REGEXP */
- int i;
- Lisp_Object syntax_table = buf->syntax_table;
-
- CHECK_STRING (string, 0);
-
- if (NILP (lim))
- XSETINT (lim, forwardp ? BUF_ZV (buf) : BUF_BEGV (buf));
- else
- CHECK_INT_COERCE_MARKER (lim, 1);
-
- /* In any case, don't allow scan outside bounds of buffer. */
- if (XINT (lim) > BUF_ZV (buf))
- lim = make_number (BUF_ZV (buf));
- if (XINT (lim) < BUF_BEGV (buf))
- lim = make_number (BUF_BEGV (buf));
-
- p = string_data (XSTRING (string));
- pend = p + string_length (XSTRING (string));
- memset (fastmap, 0, sizeof (fastmap));
-
- if (p != pend && *p == '^')
- {
- negate = 1;
- p++;
- }
-
- #ifdef MULE_REGEXP
-
- /* !!#### What's going on here? */
- b = (unsigned char *) alloca (1 + (1 << 8) / 8 + 2 + (pend - p) * 2);
- if (b == 0) error ("not enough memory");
-
- /* Find the characters specified and set their elements of fastmap. */
-
- init_compile_charset_information (ip, b, p, pend, 0, mc_flag, 1);
-
- if (compile_charset (ip))
- error ("maybe invalid charset");
- b[0] = ip->bitmap_size;
- if (ip->rt_used - ip->range_table)
- {
- b[0] |= 0x80;
- b[ip->bitmap_size+1] = (ip->rt_used - ip->range_table) >> 8;
- b[ip->bitmap_size+2] = (ip->rt_used - ip->range_table) & 0xff;
- }
-
- {
- Bufpos start_point = BUF_PT (buf);
-
- if (forwardp)
- {
- while (BUF_PT (buf) < XINT (lim))
- {
- c = BUF_FETCH_CHAR (buf, BUF_PT (buf));
-
- if (lookup_charset (b, c, 0, 0) == negate)
- break;
-
- BUF_SET_PT (buf, BUF_PT (buf) + 1);
- }
- }
- else
- {
- while (BUF_PT (buf) > XINT (lim))
- {
- Bufpos pos;
-
- pos = BUF_PT (buf) - 1;
- c = BUF_FETCH_CHAR (buf, pos);
-
- if (lookup_charset (b, c, 0, 0) == negate)
- break;
-
- BUF_SET_PT (buf, pos);
- }
- }
-
- QUIT;
- return make_number (BUF_PT (buf) - start_point);
- }
-
- #else /* !MULE_REGEXP */
-
- /* Find the characters specified and set their elements of fastmap.
- If syntaxp, each character counts as itself.
- Otherwise, handle backslashes and ranges specially */
-
- while (p != pend)
- {
- c = *p++;
- if (syntaxp)
- fastmap[c] = 1;
- else
- {
- if (c == '\\')
- {
- if (p == pend) break;
- c = *p++;
- }
- if (p != pend && *p == '-')
- {
- p++;
- if (p == pend) break;
- while (c <= *p)
- {
- fastmap[c] = 1;
- c++;
- }
- p++;
- }
- else
- fastmap[c] = 1;
- }
- }
-
- if (syntaxp && fastmap['-'] != 0)
- fastmap[' '] = 1;
-
- /* If ^ was the first character, complement the fastmap. */
-
- if (negate)
- for (i = 0; i < sizeof fastmap; i++)
- fastmap[i] ^= 1;
-
- {
- Bufpos start_point = BUF_PT (buf);
-
- if (syntaxp)
- {
-
- if (forwardp)
- {
- while (BUF_PT (buf) < XINT (lim)
- && fastmap[(unsigned char)
- syntax_code_spec[(int) SYNTAX (syntax_table,
- BUF_FETCH_CHAR (buf, BUF_PT (buf)))]])
- BUF_SET_PT (buf, BUF_PT (buf) + 1);
- }
- else
- {
- while (BUF_PT (buf) > XINT (lim)
- && fastmap[(unsigned char)
- syntax_code_spec[(int) SYNTAX (syntax_table,
- BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1))]])
- BUF_SET_PT (buf, BUF_PT (buf) - 1);
- }
- }
- else
- {
- if (forwardp)
- {
- while (BUF_PT (buf) < XINT (lim) && fastmap[BUF_FETCH_CHAR (buf, BUF_PT (buf))])
- BUF_SET_PT (buf, BUF_PT (buf) + 1);
- }
- else
- {
- while (BUF_PT (buf) > XINT (lim) && fastmap[BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1)])
- BUF_SET_PT (buf, BUF_PT (buf) - 1);
- }
- }
- QUIT;
- return make_number (BUF_PT (buf) - start_point);
- }
-
- #endif /* MULE_REGEXP */
- }
-
- DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 3, 0,
- "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
- CHARS is like the inside of a `[...]' in a regular expression\n\
- except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
- Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
- With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
- Returns the distance traveled, either zero or positive.\n\
- \n\
- Optional argument BUFFER defaults to the current buffer.")
- (chars, lim, buffer)
- Lisp_Object chars, lim, buffer;
- {
- return skip_chars (decode_buffer (buffer, 0), 1, 0, chars, lim);
- }
-
- DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 3, 0,
- "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
- See `skip-chars-forward' for details.\n\
- Returns the distance traveled, either zero or negative.\n\
- \n\
- Optional argument BUFFER defaults to the current buffer.")
- (chars, lim, buffer)
- Lisp_Object chars, lim, buffer;
- {
- return skip_chars (decode_buffer (buffer, 0), 0, 0, chars, lim);
- }
-
-
- DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 3, 0,
- "Move point forward across chars in specified syntax classes.\n\
- SYNTAX is a string of syntax code characters.\n\
- Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
- If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
- This function returns the distance traveled, either zero or positive.\n\
- \n\
- Optional argument BUFFER defaults to the current buffer.")
- (syntax, lim, buffer)
- Lisp_Object syntax, lim, buffer;
- {
- return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, lim);
- }
-
- DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 3, 0,
- "Move point backward across chars in specified syntax classes.\n\
- SYNTAX is a string of syntax code characters.\n\
- Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
- If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
- This function returns the distance traveled, either zero or negative.\n\
- \n\
- Optional argument BUFFER defaults to the current buffer.")
- (syntax, lim, buffer)
- Lisp_Object syntax, lim, buffer;
- {
- return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, lim);
- }
-
-
- /* Subroutines of Lisp buffer search functions. */
-
- static Bytind search_buffer (Lisp_Object str, Bytind pos, Bytind lim, int n,
- int RE, unsigned char *trt,
- unsigned char *inverse_trt);
-
- static Lisp_Object
- search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object no_error,
- Lisp_Object count, int direction, int RE)
- {
- /* !!#### This function has not been Mule-ized */
- int np;
- Bufpos lim;
- long n = direction;
- struct buffer *buf = current_buffer;
-
- if (!NILP (count))
- {
- CHECK_INT (count, 3);
- n *= XINT (count);
- }
-
- /* string is now checked in compile_pattern, which is called from
- search_buffer. */
- if (NILP (bound))
- lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
- else
- {
- CHECK_INT_COERCE_MARKER (bound, 1);
- lim = XINT (bound);
- if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
- error ("Invalid search bound (wrong side of point)");
- if (lim > BUF_ZV (buf))
- lim = BUF_ZV (buf);
- if (lim < BUF_BEGV (buf))
- lim = BUF_BEGV (buf);
- }
-
- np = search_buffer (string, BUF_PT (buf), lim, n, RE,
- (!NILP (buf->case_fold_search)
- ? string_data (XSTRING (buf->case_canon_table))
- : 0),
- (!NILP (buf->case_fold_search)
- ? string_data (XSTRING (buf->case_eqv_table))
- : 0));
-
- if (np <= 0)
- {
- if (NILP (no_error))
- {
- Fsignal (Qsearch_failed, list1 (string));
- return Qnil;
- }
- if (!EQ (no_error, Qt))
- {
- if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
- abort ();
- BUF_SET_PT (buf, lim);
- return Qnil;
- #if 0 /* This would be clean, but maybe programs depend on
- a value of nil here. */
- np = lim;
- #endif
- }
-
- else
- return Qnil;
- }
-
- if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
- abort ();
-
- BUF_SET_PT (buf, np);
-
- return make_number (np);
- }
-
- /* Search for the n'th occurrence of STRING in the current buffer,
- starting at position POS and stopping at position LIM,
- treating PAT as a literal string if RE is false or as
- a regular expression if RE is true.
-
- If N is positive, searching is forward and LIM must be greater than POS.
- If N is negative, searching is backward and LIM must be less than POS.
-
- Returns -x if only N-x occurrences found (x > 0),
- or else the position at the beginning of the Nth occurrence
- (if searching backward) or the end (if searching forward). */
-
- static Bytind
- search_buffer (Lisp_Object string, Bytind pos, Bytind lim, int n, int RE,
- unsigned char *trt, unsigned char *inverse_trt)
- {
- /* !!#### This function has not been Mule-ized */
- Bytecount len = 0;
- Bufbyte *base_pat = 0;
- int *BM_tab;
- int *BM_tab_base;
- int direction = ((n > 0) ? 1 : -1);
- int dirlen;
- int infinity, limit, k, stride_for_teases = 0;
- Bufbyte *pat = 0;
- Bufbyte *cursor, *p_limit, *ptr2;
- int i, j;
- Bytind p1, p2;
- Bytecount s1, s2;
- struct buffer *buf = current_buffer;
-
- if (!RE)
- {
- CHECK_STRING (string, 0);
- base_pat = string_data (XSTRING (string));
- len = string_length (XSTRING (string));
-
- /* Null string is found at starting position. */
- if (len == 0)
- {
- set_search_regs (pos, 0);
- return pos;
- }
- }
- else /* type check of string also done. */
- compile_pattern (string, &searchbuf, &search_regs,
- (char *) trt, direction < 0, 0);
-
- /* Searching 0 times means don't move. */
- if (n == 0)
- return pos;
-
- if (RE /* Here we detect whether the */
- /* generality of an RE search is */
- /* really needed. */
- /* first item is "exact match" */
- #ifdef EMACS19_REGEXP
- && *(searchbuf.buffer) == (char) RE_EXACTN_VALUE
- #else
- && *(searchbuf.buffer) == exactn
- #endif
- && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
- {
- RE = 0; /* can do straight (non RE) search */
- pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
- /* trt already applied */
- len = searchbuf.used - 2;
- }
- else if (!RE)
- {
- pat = (Bufbyte *) alloca (len * sizeof (Bufbyte));
-
- for (i = len; i--;) /* Copy the pattern; apply trt */
- *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
- pat -= len; base_pat = pat;
- }
-
- if (RE)
- {
- /* Get pointers and sizes of the two strings
- that make up the visible portion of the buffer. */
-
- p1 = BI_BUF_BEGV (buf);
- p2 = BI_BUF_CEILING_OF (buf, p1);
- s1 = p2 - p1;
- s2 = BI_BUF_ZV (buf) - p2;
-
- while (n < 0)
- {
- int val;
- QUIT;
- val = re_search_2 (&searchbuf,
- (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
- (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
- pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
- /* Don't allow match past current point */
- /* mstop for backward search is */
- /* BEGV - BEGV */
- (EXTENDED_REGEXP_P (&searchbuf) ? 0 :
- pos - BI_BUF_BEGV (buf))
- );
- if (val == -2)
- matcher_overflow ();
- if (val >= 0)
- {
- j = BI_BUF_BEGV (buf);
- for (i = 0; i < SEARCH_NREGS (&search_regs); i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i] += j;
- search_regs.end[i] += j;
- }
- XSETBUFFER (last_thing_searched, buf);
- /* Set pos to the new position. */
- pos = search_regs.start[0];
- }
- else
- {
- return (n);
- }
- n++;
- }
- while (n > 0)
- {
- int val;
- QUIT;
- val = re_search_2 (&searchbuf,
- (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
- (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
- pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
- lim - BI_BUF_BEGV (buf));
- if (val == -2)
- matcher_overflow ();
- if (val >= 0)
- {
- j = BI_BUF_BEGV (buf);
- for (i = 0; i < SEARCH_NREGS (&search_regs); i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i] += j;
- search_regs.end[i] += j;
- }
- XSETBUFFER (last_thing_searched, buf);
- pos = search_regs.end[0];
- }
- else
- {
- return (0 - n);
- }
- n--;
- }
- return (pos);
- }
- else /* non-RE case */
- {
- #ifdef C_ALLOCA
- int BM_tab_space[0400];
- BM_tab = &BM_tab_space[0];
- #else
- BM_tab = (int *) alloca (0400 * sizeof (int));
- #endif
- /* The general approach is that we are going to maintain that we know */
- /* the first (closest to the present position, in whatever direction */
- /* we're searching) character that could possibly be the last */
- /* (furthest from present position) character of a valid match. We */
- /* advance the state of our knowledge by looking at that character */
- /* and seeing whether it indeed matches the last character of the */
- /* pattern. If it does, we take a closer look. If it does not, we */
- /* move our pointer (to putative last characters) as far as is */
- /* logically possible. This amount of movement, which I call a */
- /* stride, will be the length of the pattern if the actual character */
- /* appears nowhere in the pattern, otherwise it will be the distance */
- /* from the last occurrence of that character to the end of the */
- /* pattern. */
- /* As a coding trick, an enormous stride is coded into the table for */
- /* characters that match the last character. This allows use of only */
- /* a single test, a test for having gone past the end of the */
- /* permissible match region, to test for both possible matches (when */
- /* the stride goes past the end immediately) and failure to */
- /* match (where you get nudged past the end one stride at a time). */
-
- /* Here we make a "mickey mouse" BM table. The stride of the search */
- /* is determined only by the last character of the putative match. */
- /* If that character does not match, we will stride the proper */
- /* distance to propose a match that superimposes it on the last */
- /* instance of a character that matches it (per trt), or misses */
- /* it entirely if there is none. */
-
- dirlen = len * direction;
- infinity = dirlen - (lim + pos + len + len) * direction;
- if (direction < 0)
- pat = (base_pat += len - 1);
- BM_tab_base = BM_tab;
- BM_tab += 0400;
- j = dirlen; /* to get it in a register */
- /* A character that does not appear in the pattern induces a */
- /* stride equal to the pattern length. */
- while (BM_tab_base != BM_tab)
- {
- *--BM_tab = j;
- *--BM_tab = j;
- *--BM_tab = j;
- *--BM_tab = j;
- }
- i = 0;
- while (i != infinity)
- {
- j = pat[i]; i += direction;
- if (i == dirlen) i = infinity;
- if ((int) trt)
- {
- k = (j = trt[j]);
- if (i == infinity)
- stride_for_teases = BM_tab[j];
- BM_tab[j] = dirlen - i;
- /* A translation table is accompanied by its inverse -- see */
- /* comment following downcase_table for details */
-
- while ((j = inverse_trt[j]) != k)
- BM_tab[j] = dirlen - i;
- }
- else
- {
- if (i == infinity)
- stride_for_teases = BM_tab[j];
- BM_tab[j] = dirlen - i;
- }
- /* stride_for_teases tells how much to stride if we get a */
- /* match on the far character but are subsequently */
- /* disappointed, by recording what the stride would have been */
- /* for that character if the last character had been */
- /* different. */
- }
- infinity = dirlen - infinity;
- pos += dirlen - ((direction > 0) ? direction : 0);
- /* loop invariant - pos points at where last char (first char if reverse)
- of pattern would align in a possible match. */
- while (n != 0)
- {
- if ((lim - pos - (direction > 0)) * direction < 0)
- return (n * (0 - direction));
- /* First we do the part we can by pointers (maybe nothing) */
- QUIT;
- pat = base_pat;
- limit = pos - dirlen + direction;
- /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
- have changed. See buffer.h. */
- limit = ((direction > 0)
- ? BI_BUF_CEILING_OF (buf, limit) - 1
- : BI_BUF_FLOOR_OF (buf, limit + 1));
- /* LIMIT is now the last (not beyond-last!) value
- POS can take on without hitting edge of buffer or the gap. */
- limit = ((direction > 0)
- ? min (lim - 1, min (limit, pos + 20000))
- : max (lim, max (limit, pos - 20000)));
- if ((limit - pos) * direction > 20)
- {
- p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
- ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
- /* In this loop, pos + cursor - ptr2 is the surrogate for pos */
- while (1) /* use one cursor setting as long as i can */
- {
- if (direction > 0) /* worth duplicating */
- {
- /* Use signed comparison if appropriate
- to make cursor+infinity sure to be > p_limit.
- Assuming that the buffer lies in a range of addresses
- that are all "positive" (as ints) or all "negative",
- either kind of comparison will work as long
- as we don't step by infinity. So pick the kind
- that works when we do step by infinity. */
- if ((int) (p_limit + infinity) > (int) p_limit)
- while ((int) cursor <= (int) p_limit)
- cursor += BM_tab[*cursor];
- else
- while ((unsigned int) cursor <= (unsigned int) p_limit)
- cursor += BM_tab[*cursor];
- }
- else
- {
- if ((int) (p_limit + infinity) < (int) p_limit)
- while ((int) cursor >= (int) p_limit)
- cursor += BM_tab[*cursor];
- else
- while ((unsigned int) cursor >= (unsigned int) p_limit)
- cursor += BM_tab[*cursor];
- }
- /* If you are here, cursor is beyond the end of the searched region. */
- /* This can happen if you match on the far character of the pattern, */
- /* because the "stride" of that character is infinity, a number able */
- /* to throw you well beyond the end of the search. It can also */
- /* happen if you fail to match within the permitted region and would */
- /* otherwise try a character beyond that region */
- if ((cursor - p_limit) * direction <= len)
- break; /* a small overrun is genuine */
- cursor -= infinity; /* large overrun = hit */
- i = dirlen - direction;
- if ((int) trt)
- {
- while ((i -= direction) + direction != 0)
- if (pat[i] != trt[*(cursor -= direction)])
- break;
- }
- else
- {
- while ((i -= direction) + direction != 0)
- if (pat[i] != *(cursor -= direction))
- break;
- }
- cursor += dirlen - i - direction; /* fix cursor */
- if (i + direction == 0)
- {
- cursor -= direction;
-
- set_search_regs (pos + cursor - ptr2 + ((direction > 0)
- ? 1 - len : 0),
- len);
-
- if ((n -= direction) != 0)
- cursor += dirlen; /* to resume search */
- else
- return ((direction > 0)
- ? search_regs.end[0] : search_regs.start[0]);
- }
- else
- cursor += stride_for_teases; /* <sigh> we lose - */
- }
- pos += cursor - ptr2;
- }
- else
- /* Now we'll pick up a clump that has to be done the hard */
- /* way because it covers a discontinuity */
- {
- /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
- have changed. See buffer.h. */
- limit = ((direction > 0)
- ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
- : BI_BUF_FLOOR_OF (buf, pos - dirlen));
- limit = ((direction > 0)
- ? min (limit + len, lim - 1)
- : max (limit - len, lim));
- /* LIMIT is now the last value POS can have
- and still be valid for a possible match. */
- while (1)
- {
- /* This loop can be coded for space rather than */
- /* speed because it will usually run only once. */
- /* (the reach is at most len + 21, and typically */
- /* does not exceed len) */
- while ((limit - pos) * direction >= 0)
- pos += BM_tab[BI_BUF_FETCH_CHAR (buf, pos)];
- /* now run the same tests to distinguish going off the */
- /* end, a match or a phony match. */
- if ((pos - limit) * direction <= len)
- break; /* ran off the end */
- /* Found what might be a match.
- Set POS back to last (first if reverse) char pos. */
- pos -= infinity;
- i = dirlen - direction;
- while ((i -= direction) + direction != 0)
- {
- pos -= direction;
- if (pat[i] != (((int) trt)
- ? trt[BI_BUF_FETCH_CHAR (buf, pos)]
- : BI_BUF_FETCH_CHAR (buf, pos)))
- break;
- }
- /* Above loop has moved POS part or all the way
- back to the first char pos (last char pos if reverse).
- Set it once again at the last (first if reverse) char. */
- pos += dirlen - i- direction;
- if (i + direction == 0)
- {
- pos -= direction;
-
- set_search_regs (pos + ((direction > 0) ? 1 - len : 0),
- len);
-
- if ((n -= direction) != 0)
- pos += dirlen; /* to resume search */
- else
- return ((direction > 0)
- ? search_regs.end[0] : search_regs.start[0]);
- }
- else
- pos += stride_for_teases;
- }
- }
- /* We have done one clump. Can we continue? */
- if ((lim - pos) * direction < 0)
- return ((0 - n) * direction);
- }
- return pos;
- }
- }
-
- /* Record beginning BEG and end BEG + LEN
- for a match just found in the current buffer. */
-
- static void
- set_search_regs (int beg, int len)
- {
- /* !!#### This function has not been Mule-ized */
- #ifdef EMACS19_REGEXP
- /* Make sure we have registers in which to store
- the match position. */
- if (search_regs.num_regs == 0)
- {
- regoff_t *starts, *ends;
-
- starts = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
- ends = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
- re_set_registers (&searchbuf,
- &search_regs,
- 2, starts, ends);
- }
- #endif /* EMACS19_REGEXP */
-
- search_regs.start[0] = beg;
- search_regs.end[0] = beg + len;
- XSETBUFFER (last_thing_searched, current_buffer);
- }
-
-
- /* Given a string of words separated by word delimiters,
- compute a regexp that matches those exact words
- separated by arbitrary punctuation. */
-
- static Lisp_Object
- wordify (Lisp_Object string)
- {
- /* !!#### This function has not been Mule-ized */
- unsigned char *p, *o;
- int i, len, punct_count = 0, word_count = 0;
- Lisp_Object val;
- Lisp_Object syntax_table = current_buffer->syntax_table;
-
- CHECK_STRING (string, 0);
- p = string_data (XSTRING (string));
- len = string_length (XSTRING (string));
-
- for (i = 0; i < len; i++)
- if (SYNTAX (syntax_table, p[i]) != Sword)
- {
- punct_count++;
- if (i > 0 && SYNTAX (syntax_table, p[i-1]) == Sword)
- word_count++;
- }
- if (SYNTAX (syntax_table, p[len-1]) == Sword)
- word_count++;
- if (!word_count) return build_string ("");
-
- val = make_string (p,
- len - punct_count + 5 * (word_count - 1) + 4);
-
- o = string_data (XSTRING (val));
- *o++ = '\\';
- *o++ = 'b';
-
- for (i = 0; i < len; i++)
- if (SYNTAX (syntax_table, p[i]) == Sword)
- *o++ = p[i];
- else if (i > 0
- && SYNTAX (syntax_table, p[i-1]) == Sword
- && --word_count)
- {
- *o++ = '\\';
- *o++ = 'W';
- *o++ = '\\';
- *o++ = 'W';
- *o++ = '*';
- }
-
- *o++ = '\\';
- *o++ = 'b';
-
- return val;
- }
-
- DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
- "sSearch backward: ",
- "Search backward from point for STRING.\n\
- Set point to the beginning of the occurrence found, and return point.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend before that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, position at limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions `match-beginning', `match-end' and `replace-match'.")
- (string, bound, no_error, count)
- Lisp_Object string, bound, no_error, count;
- {
- return search_command (string, bound, no_error, count, -1, 0);
- }
-
- DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
- "Search forward from point for STRING.\n\
- Set point to the end of the occurrence found, and return point.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend after that position. nil is equivalent\n\
- to (point-max).\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions `match-beginning', `match-end' and `replace-match'.")
- (string, bound, no_error, count)
- Lisp_Object string, bound, no_error, count;
- {
- return search_command (string, bound, no_error, count, 1, 0);
- }
-
- DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
- "sWord search backward: ",
- "Search backward from point for STRING, ignoring differences in punctuation.\n\
- Set point to the beginning of the occurrence found, and return point.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend before that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.")
- (string, bound, no_error, count)
- Lisp_Object string, bound, no_error, count;
- {
- return search_command (wordify (string), bound, no_error, count, -1, 1);
- }
-
- DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
- "sWord search: ",
- "Search forward from point for STRING, ignoring differences in punctuation.\n\
- Set point to the end of the occurrence found, and return point.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.")
- (string, bound, no_error, count)
- Lisp_Object string, bound, no_error, count;
- {
- return search_command (wordify (string), bound, no_error, count, 1, 1);
- }
-
- DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
- "sRE search backward: ",
- "Search backward from point for match for regular expression REGEXP.\n\
- Set point to the beginning of the match, and return point.\n\
- The match found is the one starting last in the buffer\n\
- and yet ending before the origin of the search.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must start at or after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions `match-beginning', `match-end' and `replace-match'.")
- (regexp, bound, no_error, count)
- Lisp_Object regexp, bound, no_error, count;
- {
- return search_command (regexp, bound, no_error, count, -1, 1);
- }
-
- DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
- "sRE search: ",
- "Search forward from point for regular expression REGEXP.\n\
- Set point to the end of the occurrence found, and return point.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions `match-beginning', `match-end' and `replace-match'.")
- (regexp, bound, no_error, count)
- Lisp_Object regexp, bound, no_error, count;
- {
- return search_command (regexp, bound, no_error, count, 1, 1);
- }
-
-
- DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
- "Replace text matched by last search with NEWTEXT.\n\
- If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
- Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
- based on the replaced text.\n\
- If the replaced text has only capital letters\n\
- and has at least one multiletter word, convert NEWTEXT to all caps.\n\
- If the replaced text has at least one word starting with a capital letter,\n\
- then capitalize each word in NEWTEXT.\n\n\
- If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
- Otherwise treat `\\' as special:\n\
- `\\&' in NEWTEXT means substitute original matched text.\n\
- `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
- If Nth parens didn't match, substitute nothing.\n\
- `\\\\' means insert one `\\'.\n\
- `\\u' means upcase the next character.\n\
- `\\l' means downcase the next character.\n\
- `\\U' means begin upcasing all following characters.\n\
- `\\L' means begin downcasing all following characters.\n\
- `\\E' means terminate the effect of any `\\U' or `\\L'.\n\
- Case changes made with `\\u', `\\l', `\\U', and `\\L' override\n\
- all other case changes that may be made in the replaced text.\n\
- FIXEDCASE and LITERAL are optional arguments.\n\
- Leaves point at end of replacement text.")
- (newtext, fixedcase, literal)
- Lisp_Object newtext, fixedcase, literal;
- {
- /* This function can GC */
- enum { nochange, all_caps, cap_initial } case_action;
- Bufpos pos, last;
- int some_multiletter_word;
- int some_lowercase;
- int some_uppercase;
- int some_nonuppercase_initial;
- Emchar c, prevc;
- int inslen;
- struct buffer *buf = current_buffer;
- Lisp_Object syntax_table = buf->syntax_table;
- int mc_count;
- Lisp_Object buffer;
- int_dynarr *ul_action_dynarr = 0;
- int_dynarr *ul_pos_dynarr = 0;
-
- XSETBUFFER (buffer, buf);
- CHECK_STRING (newtext, 0);
-
- case_action = nochange; /* We tried an initialization */
- /* but some C compilers blew it */
-
- #ifdef EMACS19_REGEXP
- if (search_regs.num_regs <= 0)
- error ("replace-match called before any match found");
- #endif
-
- if (search_regs.start[0] < BUF_BEGV (buf)
- || search_regs.start[0] > search_regs.end[0]
- || search_regs.end[0] > BUF_ZV (buf))
- args_out_of_range (make_number (search_regs.start[0]),
- make_number (search_regs.end[0]));
-
- if (NILP (fixedcase))
- {
- /* Decide how to casify by examining the matched text. */
-
- last = search_regs.end[0];
- prevc = '\n';
- case_action = all_caps;
-
- /* some_multiletter_word is set nonzero if any original word
- is more than one letter long. */
- some_multiletter_word = 0;
- some_lowercase = 0;
- some_nonuppercase_initial = 0;
- some_uppercase = 0;
-
- for (pos = search_regs.start[0]; pos < last; pos++)
- {
- c = BUF_FETCH_CHAR (buf, pos);
- if (LOWERCASEP (buf, c))
- {
- /* Cannot be all caps if any original char is lower case */
-
- some_lowercase = 1;
- if (SYNTAX (syntax_table, prevc) != Sword)
- some_nonuppercase_initial = 1;
- else
- some_multiletter_word = 1;
- }
- else if (!NOCASEP (buf, c))
- {
- some_uppercase = 1;
- if (SYNTAX (syntax_table, prevc) != Sword)
- ;
- else
- some_multiletter_word = 1;
- }
- else
- {
- /* If the initial is a caseless word constituent,
- treat that like a lowercase initial. */
- if (SYNTAX (syntax_table, prevc) != Sword)
- some_nonuppercase_initial = 1;
- }
-
- prevc = c;
- }
-
- /* Convert to all caps if the old text is all caps
- and has at least one multiletter word. */
- if (! some_lowercase && some_multiletter_word)
- case_action = all_caps;
- /* Capitalize each word, if the old text has all capitalized words. */
- else if (!some_nonuppercase_initial && some_multiletter_word)
- case_action = cap_initial;
- else if (!some_nonuppercase_initial && some_uppercase)
- /* Should x -> yz, operating on X, give Yz or YZ?
- We'll assume the latter. */
- case_action = all_caps;
- else
- case_action = nochange;
- }
-
- mc_count = begin_multiple_change (buf, search_regs.start[0],
- search_regs.end[0]);
-
- /* We insert the replacement text before the old text, and then
- delete the original text. This means that markers at the
- beginning or end of the original will float to the corresponding
- position in the replacement. */
- BUF_SET_PT (buf, search_regs.start[0]);
- if (!NILP (literal))
- Finsert (1, &newtext);
- else
- {
- Charcount stlen = string_char_length (XSTRING (newtext));
- Charcount strpos;
- struct gcpro gcpro1;
- GCPRO1 (newtext);
- for (strpos = 0; strpos < stlen; strpos++)
- {
- int offset = BUF_PT (buf) - search_regs.start[0];
-
- c = string_char (XSTRING (newtext), strpos);
- if (c == '\\')
- {
- c = string_char (XSTRING (newtext), ++strpos);
- if (c == '&')
- Finsert_buffer_substring
- (buffer,
- make_number (search_regs.start[0] + offset),
- make_number (search_regs.end[0] + offset));
- else if (c >= '1' && c <= '9' &&
- c <= SEARCH_NREGS (&search_regs) + '0')
- {
- if (search_regs.start[c - '0'] >= 1)
- Finsert_buffer_substring
- (buffer,
- make_number (search_regs.start[c - '0'] + offset),
- make_number (search_regs.end[c - '0'] + offset));
- }
- else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
- c == 'E')
- {
- /* Keep track of all case changes requested, but don't
- make them now. Do them later so we override
- everything else. */
- if (!ul_pos_dynarr)
- {
- ul_pos_dynarr = Dynarr_new (int);
- ul_action_dynarr = Dynarr_new (int);
- }
- Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
- Dynarr_add (ul_action_dynarr, c);
- }
- else
- buffer_insert_emacs_char (buf, c);
- }
- else
- buffer_insert_emacs_char (buf, c);
- }
- UNGCPRO;
- }
-
- inslen = BUF_PT (buf) - (search_regs.start[0]);
- buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
- inslen, 0);
-
- if (case_action == all_caps)
- Fupcase_region (make_number (BUF_PT (buf) - inslen),
- make_number (BUF_PT (buf)), buffer);
- else if (case_action == cap_initial)
- upcase_initials_region (buf, make_number (BUF_PT (buf) - inslen),
- make_number (BUF_PT (buf)));
-
- /* Now go through and make all the case changes that were requested
- in the replacement string. */
- if (ul_pos_dynarr)
- {
- Bufpos eend = BUF_PT (buf);
- int i = 0;
- int cur_action = 'E';
-
- for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
- {
- Emchar curchar = BUF_FETCH_CHAR (buf, pos);
- Emchar newchar = -1;
- if (i < Dynarr_length (ul_pos_dynarr) &&
- pos == Dynarr_at (ul_pos_dynarr, i))
- {
- int new_action = Dynarr_at (ul_action_dynarr, i);
- i++;
- if (new_action == 'u')
- newchar = UPCASE (buf, curchar);
- else if (new_action == 'l')
- newchar = DOWNCASE (buf, curchar);
- else
- cur_action = new_action;
- }
- if (newchar == -1)
- {
- if (cur_action == 'U')
- newchar = UPCASE (buf, curchar);
- else if (cur_action == 'L')
- newchar = DOWNCASE (buf, curchar);
- else
- newchar = curchar;
- }
- if (newchar != curchar)
- buffer_replace_char (buf, pos, newchar, 0, 0);
- }
-
- /* #### will not be freed if an after-change function throws,
- or whatever */
- Dynarr_free (ul_action_dynarr);
- Dynarr_free (ul_pos_dynarr);
- }
-
- end_multiple_change (buf, mc_count);
-
- return Qnil;
- }
-
- static Lisp_Object
- match_limit (Lisp_Object num, int beginningp)
- {
- /* !!#### This function has not been Mule-ized */
- int n;
-
- CHECK_INT (num, 0);
- n = XINT (num);
- if (n < 0 || n >= SEARCH_NREGS (&search_regs))
- args_out_of_range (num, make_number (SEARCH_NREGS (&search_regs)));
- #ifdef EMACS19_REGEXP
- if (search_regs.num_regs <= 0)
- return (Qnil);
- #endif
- if (search_regs.start[n] < 0)
- return Qnil;
- return (make_number ((beginningp) ? search_regs.start[n]
- : search_regs.end[n]));
- }
-
- DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
- "Return position of start of text matched by last regexp search.\n\
- NUM, specifies which parenthesized expression in the last regexp.\n\
- Value is nil if NUMth pair didn't match, or there were less than NUM pairs.\n\
- Zero means the entire text matched by the whole regexp or whole string.")
- (num)
- Lisp_Object num;
- {
- return match_limit (num, 1);
- }
-
- DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
- "Return position of end of text matched by last regexp search.\n\
- NUM specifies which parenthesized expression in the last regexp.\n\
- Value is nil if NUMth pair didn't match, or there were less than NUM pairs.\n\
- Zero means the entire text matched by the whole regexp or whole string.")
- (num)
- Lisp_Object num;
- {
- return match_limit (num, 0);
- }
-
- DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
- "Return a list containing all info on what the last regexp search matched.\n\
- Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
- All the elements are markers or nil (nil if the Nth pair didn't match)\n\
- if the last match was on a buffer; integers or nil if a string was matched.\n\
- Use `store-match-data' to reinstate the data in this list.")
- ()
- {
- /* !!#### This function has not been Mule-ized */
- Lisp_Object *data;
- int i, len;
-
- if (NILP (last_thing_searched))
- error ("match-data called before any match found");
-
- data = (Lisp_Object *) alloca ((2 * SEARCH_NREGS (&search_regs))
- * sizeof (Lisp_Object));
-
- len = -1;
- for (i = 0; i < SEARCH_NREGS (&search_regs); i++)
- {
- int start = search_regs.start[i];
- if (start >= 0)
- {
- if (EQ (last_thing_searched, Qt))
- {
- data[2 * i] = make_number (start);
- data[2 * i + 1] = make_number (search_regs.end[i]);
- }
- else if (BUFFERP (last_thing_searched))
- {
- data[2 * i] = Fmake_marker ();
- Fset_marker (data[2 * i],
- make_number (start),
- last_thing_searched);
- data[2 * i + 1] = Fmake_marker ();
- Fset_marker (data[2 * i + 1],
- make_number (search_regs.end[i]),
- last_thing_searched);
- }
- else
- /* last_thing_searched must always be Qt, a buffer, or Qnil. */
- abort ();
-
- len = i;
- }
- else
- data[2 * i] = data [2 * i + 1] = Qnil;
- }
- return Flist (2 * len + 2, data);
- }
-
-
- DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
- "Set internal data on last search match from elements of LIST.\n\
- LIST should have been created by calling `match-data' previously.")
- (list)
- Lisp_Object list;
- {
- /* !!#### This function has not been Mule-ized */
- int i;
- Lisp_Object marker;
-
- if (!CONSP (list) && !NILP (list))
- list = wrong_type_argument (Qconsp, list);
-
- /* Unless we find a marker with a buffer in LIST, assume that this
- match data came from a string. */
- last_thing_searched = Qt;
-
- #ifdef EMACS19_REGEXP
- /* Allocate registers if they don't already exist. */
- {
- int length = XINT (Flength (list)) / 2;
-
- if (length > search_regs.num_regs)
- {
- if (search_regs.num_regs == 0)
- {
- search_regs.start
- = (regoff_t *) xmalloc (length * sizeof (regoff_t));
- search_regs.end
- = (regoff_t *) xmalloc (length * sizeof (regoff_t));
- }
- else
- {
- search_regs.start
- = (regoff_t *) xrealloc (search_regs.start,
- length * sizeof (regoff_t));
- search_regs.end
- = (regoff_t *) xrealloc (search_regs.end,
- length * sizeof (regoff_t));
- }
-
- re_set_registers (&searchbuf, &search_regs, length,
- search_regs.start, search_regs.end);
- }
- }
- #endif /* EMACS19_REGEXP */
-
- for (i = 0; i < SEARCH_NREGS (&search_regs); i++)
- {
- marker = Fcar (list);
- if (NILP (marker))
- {
- search_regs.start[i] = -1;
- list = Fcdr (list);
- }
- else
- {
- if (MARKERP (marker))
- {
- if (XMARKER (marker)->buffer == 0)
- marker = Qzero;
- else
- XSETBUFFER (last_thing_searched,
- XMARKER (marker)->buffer);
- }
-
- CHECK_INT_COERCE_MARKER (marker, 0);
- search_regs.start[i] = XINT (marker);
- list = Fcdr (list);
-
- marker = Fcar (list);
- if (MARKERP (marker)
- && XMARKER (marker)->buffer == 0)
- marker = Qzero;
-
- CHECK_INT_COERCE_MARKER (marker, 0);
- search_regs.end[i] = XINT (marker);
- }
- list = Fcdr (list);
- }
-
- return Qnil;
- }
-
- /* Quote a string to inactivate reg-expr chars */
-
- DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
- "Return a regexp string which matches exactly STRING and nothing else.")
- (str)
- Lisp_Object str;
- {
- /* !!#### This function has not been Mule-ized */
- Bufbyte *in, *out, *end;
- Bufbyte *temp;
-
- CHECK_STRING (str, 0);
-
- temp = (Bufbyte *) alloca (string_length (XSTRING (str)) * 2);
-
- /* Now copy the data into the new string, inserting escapes. */
-
- in = string_data (XSTRING (str));
- end = in + string_length (XSTRING (str));
- out = temp;
-
- for (; in != end; in++)
- {
- if (*in == '[' || *in == ']'
- || *in == '*' || *in == '.' || *in == '\\'
- || *in == '?' || *in == '+'
- || *in == '^' || *in == '$')
- *out++ = '\\';
- *out++ = *in;
- }
-
- return make_string (temp, out - temp);
- }
-
- #ifdef MULE_REGEXP
-
- DEFUN ("re-compile", Fre_compile, Sre_compile, 1, 1, 0,
- "Compile REGEXP by GNU Emacs original regexp compiler,\n\
- and return information of the compiled code by a vector of length 11:\n\
- [ COMPILED-PATTERN (string)\n\
- RE-NSUB REGS-ALLOCATED CAN-BE-NULL NEWLINE-ANCHOR (integers)\n\
- NO-SUB NOT-BOL NOT-EOL SYNTAX (integers)\n\
- FASTMAP TRANSLATE (string) ].\n\
- If REGEXP is nil, just return the information of previously compiled code.")
- (regexp)
- Lisp_Object regexp;
- {
- Lisp_Object val;
-
- if (! NILP (regexp))
- {
- CHECK_STRING (regexp, 0);
- last_regexp = Qnil;
- compile_pattern (regexp, &searchbuf, &search_regs,
- (!NILP (current_buffer->case_fold_search)
- ? DOWNCASE_TABLE : 0),
- 0);
- re_compile_fastmap (&searchbuf);
- }
-
- val = Fmake_vector (11, Qnil);
- XVECTOR (val)->contents[0] = make_string (searchbuf.buffer, searchbuf.used);
- XVECTOR (val)->contents[1] = make_number (searchbuf.re_nsub);
- XVECTOR (val)->contents[2] = make_number (searchbuf.regs_allocated);
- XVECTOR (val)->contents[3] = make_number (searchbuf.can_be_null);
- XVECTOR (val)->contents[4] = make_number (searchbuf.newline_anchor);
- XVECTOR (val)->contents[5] = make_number (searchbuf.no_sub);
- XVECTOR (val)->contents[6] = make_number (searchbuf.not_bol);
- XVECTOR (val)->contents[7] = make_number (searchbuf.not_eol);
- XVECTOR (val)->contents[8] = make_number (searchbuf.syntax);
- if (searchbuf.fastmap_accurate && searchbuf.fastmap)
- XVECTOR (val)->contents[9] = make_string (searchbuf.fastmap, 256);
- if (searchbuf.translate)
- XVECTOR (val)->contents[10] = make_string (searchbuf.translate, 256);
-
- return val;
- }
-
- #endif
-
-
- /************************************************************************/
- /* initialization */
- /************************************************************************/
-
- void
- syms_of_search (void)
- {
-
- deferror (&Qsearch_failed, "search-failed", "Search failed", 1);
- deferror (&Qinvalid_regexp, "invalid-regexp", "Invalid regexp", 1);
-
- #ifdef MULE_REGEXP
- defsubr (&Sre_compile);
- #endif /* MULE_REGEXP */
-
- defsubr (&Sstring_match);
- defsubr (&Slooking_at);
- defsubr (&Sskip_chars_forward);
- defsubr (&Sskip_chars_backward);
- defsubr (&Sskip_syntax_forward);
- defsubr (&Sskip_syntax_backward);
- defsubr (&Ssearch_forward);
- defsubr (&Ssearch_backward);
- defsubr (&Sword_search_forward);
- defsubr (&Sword_search_backward);
- defsubr (&Sre_search_forward);
- defsubr (&Sre_search_backward);
- defsubr (&Sreplace_match);
- defsubr (&Smatch_beginning);
- defsubr (&Smatch_end);
- defsubr (&Smatch_data);
- defsubr (&Sstore_match_data);
- defsubr (&Sregexp_quote);
- }
-
- void
- vars_of_search (void)
- {
- /* !!#### This function has not been Mule-ized */
- searchbuf.allocated = 100;
- #ifdef EMACS19_REGEXP
- searchbuf.buffer = (unsigned char *) xmalloc (searchbuf.allocated);
- #else
- searchbuf.buffer = (char *) xmalloc (searchbuf.allocated);
- #endif
- searchbuf.fastmap = search_fastmap;
-
- #ifdef MULE_REGEXP
- #ifdef EMULATE_EMACS19
- search_regs.num_regs = RE_NREGS;
- #endif /* EMULATE_EMACS19 */
-
- for (i = 0; i <= MAXWORDBUF; i++)
- wordbuf[i] = (struct re_pattern_buffer *)0;
- #endif /* MULE_REGEXP */
-
- last_regexp = Qnil;
- staticpro (&last_regexp);
-
- last_thing_searched = Qnil;
- staticpro (&last_thing_searched);
-
- #ifdef MULE_REGEXP
- DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp,
- "*Regular expression to be used in forward-word.");
- Vforward_word_regexp = Qnil;
-
- DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp,
- "*Regular expression to be used in backward-word.");
- Vbackward_word_regexp = Qnil;
-
- DEFVAR_INT ("regexp-version", &Vregexp_version,
- "version number of system internal regexp compiler and interpreter.");
- Vregexp_version = 0;
- #ifdef EMACS18_REGEXP
- Vregexp_version = 18;
- #endif /* EMACS18_REGEXP */
- #ifdef EMACS19_REGEXP
- Vregexp_version = 19;
- #endif /* EMACS19_REGEXP */
- #endif /* MULE_REGEXP */
- }
-